import numpy as np
from math import *
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import sys, random, time
from env import single_expert_dynamics,single_expert_stochastic_dynamics, expert1_reward, expert2_reward, expert3_reward, expert1_cost, expert2_cost, expert3_cost, feature1, feature2, feature3, expert_1_basis_constraint, expert_2_basis_constraint, expert_3_basis_constraint
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
from mpl_toolkits.axes_grid1.inset_locator import inset_axes


def reward_cost_list(trajectories,num_data):
  omega1=np.mat([1.0,-1.0]).T
  omega2=np.mat([1.0,-1.0]).T
  omega3=np.mat([1.0,-1.0]).T
  theta1=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta2=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0]).T
  theta3=np.mat([1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0]).T
  reward_list=[]
  cost_list=[]
  for i in range(num_data):
    reward=0.0
    cost=0.0
    single_trajectory=trajectories[30*i:30*(i+1),:]
    for j in range(30):
      state1=np.mat(np.copy(single_trajectory[j][0:2])).T
      state2=np.mat(np.copy(single_trajectory[j][2:4])).T
      state3=np.mat(np.copy(single_trajectory[j][4:6])).T
      action1=np.mat(np.copy(single_trajectory[j][6])).T
      action2=np.mat(np.copy(single_trajectory[j][7])).T
      action3=np.mat(np.copy(single_trajectory[j][8])).T
      single_reward=expert1_reward(omega1,state1,action1)+expert2_reward(omega2,state2,action2)+expert3_reward(omega3,state3,action3)
      single_cost=expert1_cost(theta1,state1,action1)+expert2_cost(theta2,state2,action2)+expert3_cost(theta3,state3,action3)
      reward=reward+single_reward
      cost=cost+single_cost
    reward_list.append(reward)
    cost_list.append(cost)
  return reward_list, cost_list

iterations=51
num_trials=100


cost1_mean=np.array(np.loadtxt("cost1_mean_file.txt",dtype=float))/1000.0
cost2_mean=np.array(np.loadtxt("cost2_mean_file.txt",dtype=float))/1000.0
cost3_mean=np.array(np.loadtxt("cost3_mean_file.txt",dtype=float))/1000.0
cost4_mean=np.array(np.loadtxt("cost4_mean_file.txt",dtype=float))/1000.0

cost1_sd=np.array(np.loadtxt("cost1_sd_file.txt",dtype=float))/1000.0
cost2_sd=np.array(np.loadtxt("cost2_sd_file.txt",dtype=float))/1000.0
cost3_sd=np.array(np.loadtxt("cost3_sd_file.txt",dtype=float))/1000.0
cost4_sd=np.array(np.loadtxt("cost4_sd_file.txt",dtype=float))/1000.0

likelihood1=np.array(np.loadtxt("likelihood1_file.txt",dtype=float))
likelihood2=np.array(np.loadtxt("likelihood2_file.txt",dtype=float))
likelihood3=np.array(np.loadtxt("likelihood3_file.txt",dtype=float))
likelihood4=np.array(np.loadtxt("likelihood4_file.txt",dtype=float))
likelihood=np.array([-115.2174]*51)


plt.rcParams.update({'font.size': 14})
axis=np.arange(0,iterations)
subaxis=np.arange(0,3)
fig, ax = plt.subplots()
ax.plot(axis,cost1_mean,'-',label='Learner $1$')
ax.fill_between(axis,cost1_mean-cost1_sd,cost1_mean+cost1_sd,alpha=0.2)
ax.plot(axis,cost2_mean,'--')
ax.fill_between(axis,cost2_mean-cost2_sd,cost2_mean+cost2_sd,alpha=0.2)
ax.plot(axis,cost3_mean,'-.')
ax.fill_between(axis,cost3_mean-cost3_sd,cost3_mean+cost3_sd,alpha=0.2)
ax.plot(axis,cost4_mean,':')
ax.fill_between(axis,cost4_mean-cost4_sd,cost4_mean+cost4_sd,alpha=0.2)
#ax.plot(axis,cumulative_reward_mean_set,'o',alpha=0.8,markersize=4,label='Centralized Learner')
#ax.fill_between(axis,cumulative_reward_mean_set-cumulative_reward_sd_set,cumulative_reward_mean_set+cumulative_reward_sd_set,alpha=0.2)
#ax.plot(axis,expert_reward_mean,'^',alpha=1.0,markersize=3)
#ax.fill_between(axis,expert_reward_mean-expert_reward_sd,expert_reward_mean+expert_reward_sd,alpha=0.2)
plt.xticks(np.arange(0,iterations,10))
plt.title('(a) Cumulative Cost (scaled)')
plt.xlabel('iterations')
plt.legend(loc=1)
axins = inset_axes(ax,
                   width="20%", # width = 30% of parent_bbox
                   height="20%", # height : 1 inch
                   bbox_to_anchor=(-300, -50, 650, 450),
                   loc=5)
#axins = zoomed_inset_axes(ax,1, loc=6)
axins.plot(subaxis,cost1_mean[0:3],'-')
axins.fill_between(subaxis,cost1_mean[0:3]-cost1_sd[0:3],cost1_mean[0:3]+cost1_sd[0:3],alpha=0.2)
axins.plot(subaxis,cost2_mean[0:3],'--')
axins.fill_between(subaxis,cost2_mean[0:3]-cost2_sd[0:3],cost2_mean[0:3]+cost2_sd[0:3],alpha=0.2)
axins.plot(subaxis,cost3_mean[0:3],'-.')
axins.fill_between(subaxis,cost3_mean[0:3]-cost3_sd[0:3],cost3_mean[0:3]+cost3_sd[0:3],alpha=0.2)
axins.plot(subaxis,cost4_mean[0:3],':')
axins.fill_between(subaxis,cost4_mean[0:3]-cost4_sd[0:3],cost4_mean[0:3]+cost4_sd[0:3],alpha=0.2)
#axins.plot(subaxis,cumulative_reward_mean_set[0:11],'o',alpha=0.8,markersize=4)
#axins.fill_between(subaxis,cumulative_reward_mean_set[0:11]-cumulative_reward_sd_set[0:11],cumulative_reward_mean_set[0:11]+cumulative_reward_sd_set[0:11],alpha=0.2)
plt.xticks(np.arange(0,3,1))
mark_inset(ax, axins, loc1=2, loc2=4,  ec="0.5")
plt.savefig('cumulative_cost.pdf') 
plt.show()

 

fig, ax = plt.subplots()
ax.plot(axis,likelihood1,'-')
ax.plot(axis,likelihood2,'--')
ax.plot(axis,likelihood3,'-.')
ax.plot(axis,likelihood4,':')
ax.plot(axis,likelihood,'^',alpha=1.0,markersize=3,label='Experts')
#ax.plot(axis,divergence_set,'o',alpha=0.5,markersize=4)
plt.xticks(np.arange(0,iterations,10))
plt.title('(b) Likelihood')
plt.xlabel('iterations')
plt.legend()
axins = inset_axes(ax,
                   width="20%", # width = 30% of parent_bbox
                   height="20%", # height : 1 inch
                   bbox_to_anchor=(-300, -80, 650, 450),
                   loc=5)
axins.plot(subaxis,likelihood1[0:3],'-')
axins.plot(subaxis,likelihood2[0:3],'--')
axins.plot(subaxis,likelihood3[0:3],'-.')
axins.plot(subaxis,likelihood4[0:3],':')
plt.xticks(np.arange(0,3,1))
mark_inset(ax, axins, loc1=2, loc2=4,  ec="0.5")

plt.savefig('likelihood.pdf')  
plt.show()


